package com.rrd.hive.udf;

import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;

import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;

public class StrToUnicodeUDF extends UDF {

    public Text evaluate(String str) throws UnsupportedEncodingException {
        Text text = new Text("");
        if (str == null || str.length() == 0) {
            return text;
        }

        text.set(new String(str.getBytes("GB18030"), "GB18030"));
        return text;

    }

    /**
     * 字符串转换unicode
     * 
     * @param s
     * @return
     */
    public static String string2Unicode(String s) {

        StringBuffer unicode = new StringBuffer();

        for (int i = 0; i < s.length(); i++) {

            // 取出每一个字符  
            char c = s.charAt(i);

            // 转换为unicode  
            unicode.append("\\u" + Integer.toHexString(c));
        }

        return unicode.toString();
    }

    public static void cpfile() {
        String p1 = "d:/31/a.txt";
        String p2 = "d:/31/c.txt";
        try {
            InputStream in = new FileInputStream(p1);
            OutputStream os = new FileOutputStream(p2);
            BufferedWriter writer = new BufferedWriter(
                new OutputStreamWriter(os, "GB18030"));
            byte[] arr = new byte[in.available()];
            in.read(arr);
            String s = new String(arr, "UTF-8");
            System.out.println(new String(s.getBytes("GB18030"), "GB18030"));
//			writer.write(new String(s.getBytes("GB18030"),"GB18030"));
            writer.write("广东省吴川市塘㙍镇杨屋村257号101沄房");
            System.out.println(new String(
                "广东省吴川市塘㙍镇杨屋村257号101沄房".getBytes("GB18030"), "GB18030"));
//			os.write(s.getBytes("GB18030"));
            writer.flush();
            writer.close();
            os.flush();
            os.close();
            in.close();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

    public static void main(String[] args) throws UnsupportedEncodingException {
        String regex = "^[a-z0-9A-Z\u4e00-\u9fcc]+$";
        String s = "广东省吴川市塘㙍镇杨屋";
        byte[] arr = s.getBytes("GB18030");
        System.out.println(s.length());
        System.out.println(arr.length);

        System.out.println(new String(arr, "GBK"));
        System.out.println("--------------------");
//        StrToUnicodeUDF.cpfile();

    }

}
